{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"..\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Import"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from finnlp.data_sources.social_media.twitter import Twitter_Downloader"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "stock = \"AAPL\"\n",
    "start_date = \"2023-01-01\"\n",
    "end_date = \"2023-01-05\""
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Downloader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "downloader = Twitter_Downloader()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f95d73d68fff4354aadfd0482bb52952",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/5 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "downloader.download(start_date, end_date,stock)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(84, 38)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "downloader.dataframe.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>created_at</th>\n",
       "      <th>id</th>\n",
       "      <th>id_str</th>\n",
       "      <th>full_text</th>\n",
       "      <th>truncated</th>\n",
       "      <th>display_text_range</th>\n",
       "      <th>entities</th>\n",
       "      <th>extended_entities</th>\n",
       "      <th>source</th>\n",
       "      <th>in_reply_to_status_id</th>\n",
       "      <th>...</th>\n",
       "      <th>retweeted</th>\n",
       "      <th>possibly_sensitive</th>\n",
       "      <th>possibly_sensitive_editable</th>\n",
       "      <th>lang</th>\n",
       "      <th>supplemental_language</th>\n",
       "      <th>self_thread</th>\n",
       "      <th>quoted_status_id</th>\n",
       "      <th>quoted_status_id_str</th>\n",
       "      <th>quoted_status_permalink</th>\n",
       "      <th>card</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-01-01 01:25:43+00:00</td>\n",
       "      <td>1609360184694157312</td>\n",
       "      <td>1609360184694157312</td>\n",
       "      <td>2022 was the birth this movement. 2023 is when...</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 147]</td>\n",
       "      <td>{'hashtags': [{'text': 'SPY', 'indices': [97, ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>1609360182714241024</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>en</td>\n",
       "      <td>None</td>\n",
       "      <td>{'id': 1609360176640925699, 'id_str': '1609360...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-01-01 03:20:49+00:00</td>\n",
       "      <td>1609389151253835777</td>\n",
       "      <td>1609389151253835777</td>\n",
       "      <td>たくさんコメントありがとうございました☺️</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 21]</td>\n",
       "      <td>{'hashtags': [], 'symbols': [], 'user_mentions...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>None</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ja</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2023-01-01 04:21:54+00:00</td>\n",
       "      <td>1609404522803363846</td>\n",
       "      <td>1609404522803363846</td>\n",
       "      <td>The fall of Apple iphone market share.\\n- peak...</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 212]</td>\n",
       "      <td>{'hashtags': [], 'symbols': [{'text': 'AAPL', ...</td>\n",
       "      <td>{'media': [{'id': 1609404518500032514, 'id_str...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>None</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>en</td>\n",
       "      <td>None</td>\n",
       "      <td>{'id': 1609404522803363846, 'id_str': '1609404...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2023-01-01 04:32:23+00:00</td>\n",
       "      <td>1609407163671400448</td>\n",
       "      <td>1609407163671400448</td>\n",
       "      <td>Apple iphone market share peaked in H1 2009 an...</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 160]</td>\n",
       "      <td>{'hashtags': [], 'symbols': [{'text': 'AAPL', ...</td>\n",
       "      <td>{'media': [{'id': 1609407158696972289, 'id_str...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>1609404522803363846</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>en</td>\n",
       "      <td>None</td>\n",
       "      <td>{'id': 1609404522803363846, 'id_str': '1609404...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2023-01-01 04:43:47+00:00</td>\n",
       "      <td>1609410032734711809</td>\n",
       "      <td>1609410032734711809</td>\n",
       "      <td>That sounds impossible if we look at how fast ...</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 272]</td>\n",
       "      <td>{'hashtags': [{'text': 'iPhone', 'indices': [2...</td>\n",
       "      <td>{'media': [{'id': 1609410028653645824, 'id_str...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>1609407163671400448</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>en</td>\n",
       "      <td>None</td>\n",
       "      <td>{'id': 1609404522803363846, 'id_str': '1609404...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>2023-01-04 21:45:11+00:00</td>\n",
       "      <td>1610754237004189710</td>\n",
       "      <td>1610754237004189710</td>\n",
       "      <td>APPLE $AAPL TO SIGN UP LUXSHARE TO PRODUCE IPH...</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 64]</td>\n",
       "      <td>{'hashtags': [], 'symbols': [{'text': 'AAPL', ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>None</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>en</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>2023-01-04 22:21:45+00:00</td>\n",
       "      <td>1610763442092183585</td>\n",
       "      <td>1610763442092183585</td>\n",
       "      <td>$AAPL https://t.co/Fb8UbPUy9S</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 5]</td>\n",
       "      <td>{'hashtags': [], 'symbols': [{'text': 'AAPL', ...</td>\n",
       "      <td>{'media': [{'id': 1610763438053068835, 'id_str...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>None</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>pl</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>2023-01-04 22:53:05+00:00</td>\n",
       "      <td>1610771324355346432</td>\n",
       "      <td>1610771324355346432</td>\n",
       "      <td>$AAPL This doesn't fix their demand issues fol...</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 49]</td>\n",
       "      <td>{'hashtags': [], 'symbols': [{'text': 'AAPL', ...</td>\n",
       "      <td>{'media': [{'id': 1610771243019689984, 'id_str...</td>\n",
       "      <td>&lt;a href=\"https://mobile.twitter.com\" rel=\"nofo...</td>\n",
       "      <td>None</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>en</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>2023-01-04 23:10:20+00:00</td>\n",
       "      <td>1610775668924583936</td>\n",
       "      <td>1610775668924583936</td>\n",
       "      <td>These TOP companies have cash in the bank!\\n$A...</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 173]</td>\n",
       "      <td>{'hashtags': [], 'symbols': [{'text': 'AAPL', ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>None</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>en</td>\n",
       "      <td>None</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>2023-01-04 23:10:46+00:00</td>\n",
       "      <td>1610775777083006976</td>\n",
       "      <td>1610775777083006976</td>\n",
       "      <td>Darvas strategy. \\n\\nPart 15• \\n\\nAlways speak...</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 122]</td>\n",
       "      <td>{'hashtags': [], 'symbols': [{'text': 'MSFT', ...</td>\n",
       "      <td>{'media': [{'id': 1610775771181682690, 'id_str...</td>\n",
       "      <td>&lt;a href=\"http://twitter.com/download/iphone\" r...</td>\n",
       "      <td>None</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>en</td>\n",
       "      <td>None</td>\n",
       "      <td>{'id': 1610775777083006976, 'id_str': '1610775...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>84 rows × 38 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  created_at                   id               id_str  \\\n",
       "0  2023-01-01 01:25:43+00:00  1609360184694157312  1609360184694157312   \n",
       "1  2023-01-01 03:20:49+00:00  1609389151253835777  1609389151253835777   \n",
       "2  2023-01-01 04:21:54+00:00  1609404522803363846  1609404522803363846   \n",
       "3  2023-01-01 04:32:23+00:00  1609407163671400448  1609407163671400448   \n",
       "4  2023-01-01 04:43:47+00:00  1609410032734711809  1609410032734711809   \n",
       "..                       ...                  ...                  ...   \n",
       "79 2023-01-04 21:45:11+00:00  1610754237004189710  1610754237004189710   \n",
       "80 2023-01-04 22:21:45+00:00  1610763442092183585  1610763442092183585   \n",
       "81 2023-01-04 22:53:05+00:00  1610771324355346432  1610771324355346432   \n",
       "82 2023-01-04 23:10:20+00:00  1610775668924583936  1610775668924583936   \n",
       "83 2023-01-04 23:10:46+00:00  1610775777083006976  1610775777083006976   \n",
       "\n",
       "                                            full_text truncated  \\\n",
       "0   2022 was the birth this movement. 2023 is when...     False   \n",
       "1                               たくさんコメントありがとうございました☺️     False   \n",
       "2   The fall of Apple iphone market share.\\n- peak...     False   \n",
       "3   Apple iphone market share peaked in H1 2009 an...     False   \n",
       "4   That sounds impossible if we look at how fast ...     False   \n",
       "..                                                ...       ...   \n",
       "79  APPLE $AAPL TO SIGN UP LUXSHARE TO PRODUCE IPH...     False   \n",
       "80                      $AAPL https://t.co/Fb8UbPUy9S     False   \n",
       "81  $AAPL This doesn't fix their demand issues fol...     False   \n",
       "82  These TOP companies have cash in the bank!\\n$A...     False   \n",
       "83  Darvas strategy. \\n\\nPart 15• \\n\\nAlways speak...     False   \n",
       "\n",
       "   display_text_range                                           entities  \\\n",
       "0            [0, 147]  {'hashtags': [{'text': 'SPY', 'indices': [97, ...   \n",
       "1             [0, 21]  {'hashtags': [], 'symbols': [], 'user_mentions...   \n",
       "2            [0, 212]  {'hashtags': [], 'symbols': [{'text': 'AAPL', ...   \n",
       "3            [0, 160]  {'hashtags': [], 'symbols': [{'text': 'AAPL', ...   \n",
       "4            [0, 272]  {'hashtags': [{'text': 'iPhone', 'indices': [2...   \n",
       "..                ...                                                ...   \n",
       "79            [0, 64]  {'hashtags': [], 'symbols': [{'text': 'AAPL', ...   \n",
       "80             [0, 5]  {'hashtags': [], 'symbols': [{'text': 'AAPL', ...   \n",
       "81            [0, 49]  {'hashtags': [], 'symbols': [{'text': 'AAPL', ...   \n",
       "82           [0, 173]  {'hashtags': [], 'symbols': [{'text': 'AAPL', ...   \n",
       "83           [0, 122]  {'hashtags': [], 'symbols': [{'text': 'MSFT', ...   \n",
       "\n",
       "                                    extended_entities  \\\n",
       "0                                                 NaN   \n",
       "1                                                 NaN   \n",
       "2   {'media': [{'id': 1609404518500032514, 'id_str...   \n",
       "3   {'media': [{'id': 1609407158696972289, 'id_str...   \n",
       "4   {'media': [{'id': 1609410028653645824, 'id_str...   \n",
       "..                                                ...   \n",
       "79                                                NaN   \n",
       "80  {'media': [{'id': 1610763438053068835, 'id_str...   \n",
       "81  {'media': [{'id': 1610771243019689984, 'id_str...   \n",
       "82                                                NaN   \n",
       "83  {'media': [{'id': 1610775771181682690, 'id_str...   \n",
       "\n",
       "                                               source in_reply_to_status_id  \\\n",
       "0   <a href=\"https://mobile.twitter.com\" rel=\"nofo...   1609360182714241024   \n",
       "1   <a href=\"http://twitter.com/download/iphone\" r...                  None   \n",
       "2   <a href=\"http://twitter.com/download/iphone\" r...                  None   \n",
       "3   <a href=\"http://twitter.com/download/iphone\" r...   1609404522803363846   \n",
       "4   <a href=\"http://twitter.com/download/iphone\" r...   1609407163671400448   \n",
       "..                                                ...                   ...   \n",
       "79  <a href=\"https://mobile.twitter.com\" rel=\"nofo...                  None   \n",
       "80  <a href=\"http://twitter.com/download/iphone\" r...                  None   \n",
       "81  <a href=\"https://mobile.twitter.com\" rel=\"nofo...                  None   \n",
       "82  <a href=\"http://twitter.com/download/iphone\" r...                  None   \n",
       "83  <a href=\"http://twitter.com/download/iphone\" r...                  None   \n",
       "\n",
       "    ... retweeted possibly_sensitive possibly_sensitive_editable lang  \\\n",
       "0   ...     False                NaN                         NaN   en   \n",
       "1   ...     False                NaN                         NaN   ja   \n",
       "2   ...     False              False                        True   en   \n",
       "3   ...     False              False                        True   en   \n",
       "4   ...     False              False                        True   en   \n",
       "..  ...       ...                ...                         ...  ...   \n",
       "79  ...     False                NaN                         NaN   en   \n",
       "80  ...     False              False                        True   pl   \n",
       "81  ...     False              False                        True   en   \n",
       "82  ...     False                NaN                         NaN   en   \n",
       "83  ...     False              False                        True   en   \n",
       "\n",
       "   supplemental_language                                        self_thread  \\\n",
       "0                   None  {'id': 1609360176640925699, 'id_str': '1609360...   \n",
       "1                   None                                                NaN   \n",
       "2                   None  {'id': 1609404522803363846, 'id_str': '1609404...   \n",
       "3                   None  {'id': 1609404522803363846, 'id_str': '1609404...   \n",
       "4                   None  {'id': 1609404522803363846, 'id_str': '1609404...   \n",
       "..                   ...                                                ...   \n",
       "79                  None                                                NaN   \n",
       "80                  None                                                NaN   \n",
       "81                  None                                                NaN   \n",
       "82                  None                                                NaN   \n",
       "83                  None  {'id': 1610775777083006976, 'id_str': '1610775...   \n",
       "\n",
       "   quoted_status_id quoted_status_id_str quoted_status_permalink card  \n",
       "0               NaN                  NaN                     NaN  NaN  \n",
       "1               NaN                  NaN                     NaN  NaN  \n",
       "2               NaN                  NaN                     NaN  NaN  \n",
       "3               NaN                  NaN                     NaN  NaN  \n",
       "4               NaN                  NaN                     NaN  NaN  \n",
       "..              ...                  ...                     ...  ...  \n",
       "79              NaN                  NaN                     NaN  NaN  \n",
       "80              NaN                  NaN                     NaN  NaN  \n",
       "81              NaN                  NaN                     NaN  NaN  \n",
       "82              NaN                  NaN                     NaN  NaN  \n",
       "83              NaN                  NaN                     NaN  NaN  \n",
       "\n",
       "[84 rows x 38 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "downloader.dataframe"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "finrl",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
